<?php
ini_set( 'display_errors','on' );
error_reporting( E_ERROR );

require_once 'common.php';
require_once 'oneItem.php';

// never time out
set_time_limit(0);

date_default_timezone_set('Asia/Shanghai');

$prefix = "http://www.yyets.com/php/resourcelist";
$url = "http://www.yyets.com/php/resourcelist";
//$url = "http://www.yyets.com/php/resourcelist";

$lastCrawlerTime = strtotime( file_get_contents( CRAWLER_TIME_FILE ) );

// delete newly crawlered info
if(file_exists(CRAWLER_DELTA_FILE)){
    unlink( CRAWLER_DELTA_FILE );
}
// main entry
saveOnePage($url);

function saveOnePage( $pageUrl ){
    global $prefix, $lastCrawlerTime;
    $html = file_get_contents($pageUrl);

    list( $junk, $tail ) = split('box_4 res_listview',$html );
    list( $content, $junk ) = split('<div class=\"clearfix\"></div>',$tail );
    $items = explode( "</li>" , $content );

    // get pager information
    $pager = array_splice($items, -1, 1);
    // get next page;
    list( $junk, $remain ) = split( "class='cur'", $pager[0] );
    $nextPage = GetInnerString( $remain, "href='", "'");
    
    foreach( $items as $item ){
        $itemType = GetInnerString( $item,"\">【", "】<strong>" );
        $url = GetInnerString( $item, "href=\"", "\"><img src");
        $imgUrl = GetInnerString( $item, "<img src=\"", "\"></a>");
        $title = GetInnerString( $item, "<strong>", "</strong>");
        $updateTime = GetInnerString( $item, "【更新】</font> ", "\|");

        echo $title." ".$url." ".$imgUrl." ".$updateTime."<br />\n";
        error_log( "processing....".$title." ".$url." ".$imgUrl." ".$updateTime."\n" , 3, "fetch_detail.log");
        $oneItem = array();
        $oneItem['type'] = $itemType;
        $oneItem['url'] = $url;
        $oneItem['imgUrl'] = $imgUrl;
        $oneItem['title'] = $title;
        $oneItem['updateTime'] = $updateTime;
        
        $updateTimestamp = strtotime( $updateTime );
        // update for 5 mins redundence
        if( $lastCrawlerTime > $updateTimestamp+300 ){
            file_put_contents( CRAWLER_TIME_FILE, date('Y-m-d H:i:s', time()) );
            error_log( "no new content to get, existing....\n" , 3, "fetch_detail.log");
            exit();
        }
        saveOneItem($url, $oneItem);
        // slow down
        sleep(1);
    }
    
    if( !empty($nextPage) ){
        saveOnePage( $prefix.$nextPage );
    }
}


?>
